In [1]:
import plotly.express as px
import pandas as pd
import numpy as np
In [2]:
df = pd.read_csv("bangladesh_divisions_dataset.csv")
In [3]:
df.head()
Out[3]:
Location Soil_Type Fertility_Index Land_Use_Type Average_Rainfall(mm) Temperature(°C) Crop_Suitability Season Satellite_Observation_Date Remarks
0 Sylhet Loamy 62 Agricultural 72 28.6 Wheat Monsoon 2024-09-24 Requires attention
1 Dhaka Sandy 63 Unused 118 23.8 Maize Autumn 2024-01-31 Moderate potential
2 Rangpur Peaty 51 Agricultural 106 32.0 Maize Autumn 2024-03-11 Requires attention
3 Khulna Sandy 67 Barren 336 31.6 Wheat Autumn 2024-09-29 Low potential
4 Rangpur Peaty 63 Agricultural 237 20.1 Rice Winter 2024-04-01 Moderate potential
In [4]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 10 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Location                    2000 non-null   object 
 1   Soil_Type                   2000 non-null   object 
 2   Fertility_Index             2000 non-null   int64  
 3   Land_Use_Type               2000 non-null   object 
 4   Average_Rainfall(mm)        2000 non-null   int64  
 5   Temperature(°C)             2000 non-null   float64
 6   Crop_Suitability            2000 non-null   object 
 7   Season                      2000 non-null   object 
 8   Satellite_Observation_Date  2000 non-null   object 
 9   Remarks                     2000 non-null   object 
dtypes: float64(1), int64(2), object(7)
memory usage: 156.4+ KB
In [5]:
df.describe()
Out[5]:
Fertility_Index Average_Rainfall(mm) Temperature(°C)
count 2000.00000 2000.000000 2000.000000
mean 70.10450 223.136000 27.330250
std 17.97699 100.548543 4.341251
min 40.00000 50.000000 20.000000
25% 54.00000 137.000000 23.500000
50% 70.00000 222.500000 27.300000
75% 86.00000 308.000000 31.000000
max 100.00000 400.000000 35.000000
In [6]:
df.isnull().sum()
Out[6]:
Location                      0
Soil_Type                     0
Fertility_Index               0
Land_Use_Type                 0
Average_Rainfall(mm)          0
Temperature(°C)               0
Crop_Suitability              0
Season                        0
Satellite_Observation_Date    0
Remarks                       0
dtype: int64
In [7]:
df.duplicated().sum()
Out[7]:
0
In [8]:
print("Unique locations: ", df['Location'].unique())
Unique locations:  ['Sylhet' 'Dhaka' 'Rangpur' 'Khulna' 'Rajshahi' 'Chattogram' 'Barishal'
 'Mymensingh']
In [9]:
df.rename(columns={
    'Average_Rainfall(mm)': 'RainfallMM',
    'Temperature(°C)': 'TemperatureC'
}, inplace=True)
In [10]:
df
Out[10]:
Location Soil_Type Fertility_Index Land_Use_Type RainfallMM TemperatureC Crop_Suitability Season Satellite_Observation_Date Remarks
0 Sylhet Loamy 62 Agricultural 72 28.6 Wheat Monsoon 2024-09-24 Requires attention
1 Dhaka Sandy 63 Unused 118 23.8 Maize Autumn 2024-01-31 Moderate potential
2 Rangpur Peaty 51 Agricultural 106 32.0 Maize Autumn 2024-03-11 Requires attention
3 Khulna Sandy 67 Barren 336 31.6 Wheat Autumn 2024-09-29 Low potential
4 Rangpur Peaty 63 Agricultural 237 20.1 Rice Winter 2024-04-01 Moderate potential
... ... ... ... ... ... ... ... ... ... ...
1995 Barishal Loamy 65 Unused 278 20.3 Spices Summer 2024-02-12 Requires attention
1996 Chattogram Clay 52 Residential 224 29.3 Jute Winter 2024-08-20 Moderate potential
1997 Rangpur Clay 97 Barren 84 25.6 Jute Summer 2024-09-07 Requires attention
1998 Rajshahi Sandy 85 Residential 209 23.5 Rice Summer 2024-08-23 Moderate potential
1999 Sylhet Sandy 73 Agricultural 134 20.2 Wheat Winter 2024-11-01 Moderate potential

2000 rows × 10 columns

In [11]:
fig2 = px.pie(df, names='Land_Use_Type', title='Proportion of Land Use Types',
              color_discrete_sequence=px.colors.sequential.RdBu)
fig2.show()
In [12]:
fig1 = px.bar(df, x='Location', color='Soil_Type', title='Count of Soil Types Across Locations',
              color_discrete_sequence=px.colors.sequential.Blues[2:])
fig1.show()
In [13]:
fig3 = px.histogram(df, x='Fertility_Index', nbins=10, title='Distribution of Fertility Score',
                    color_discrete_sequence=['#1F77B4'])
fig3.update_layout(bargap=0.2)
fig3.show()
In [14]:
fig4 = px.histogram(df, x='RainfallMM', nbins=10, title='Distribution of Rainfall',
                    color_discrete_sequence=px.colors.sequential.Cividis)
fig4.update_layout(bargap=0.2)
fig4.show()
In [15]:
fig5 = px.histogram(df, x='TemperatureC', nbins=10, title='Distribution of Temperature',
                    color_discrete_sequence=['#FFA15A'])
fig5.update_layout(bargap=0.2)
fig5.show()
In [16]:
fig6 = px.scatter(df, x='RainfallMM', y='TemperatureC', title='Average Rainfall vs. Temperature',
                  color='RainfallMM', color_continuous_scale='Viridis')

fig6.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)', title_font_size=20, font=dict(size=12))
fig6.show()
In [17]:
fig7 = px.scatter(df, x='Fertility_Index', y='RainfallMM', title='Fertility Index vs. Average Rainfall',
                  color='Fertility_Index', color_continuous_scale='Blues')

fig7.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)', title_font_size=20, font=dict(size=12))
fig7.show()
In [18]:
fig8 = px.box(df, x='Season', y='TemperatureC', title='Temperature Distribution Across Seasons',
              color='Season', color_discrete_sequence=px.colors.qualitative.Set2)

fig8.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)', title_font_size=20, font=dict(size=12))
fig8.show()
In [19]:
fig9 = px.violin(df, y='TemperatureC', x='Soil_Type', box=True, points="all", 
                 title="Temperature Distribution by Soil Type",
                 color='Soil_Type',
                 color_discrete_sequence=px.colors.qualitative.Set2)

fig9.update_layout(
    width=900,
    height=700,
    title_font_size=20,
    font=dict(size=12),
    plot_bgcolor='rgba(0, 0, 0, 0)'
)

fig9.show()
In [20]:
fig10 = px.bar(df.groupby('Soil_Type')['Crop_Suitability'].value_counts().reset_index(name='Count'), 
               x='Soil_Type', y='Count', color='Crop_Suitability', 
               title='Crop Suitability by Soil Type',
               color_discrete_sequence=px.colors.sequential.Plasma)

fig10.update_layout(
    plot_bgcolor='rgba(0, 0, 0, 0)',
    title_font_size=20,
    font=dict(size=12)
)

fig10.show()
In [21]:
fig11 = px.sunburst(df, path=['Soil_Type', 'Land_Use_Type', 'Crop_Suitability'], 
                   title="Crop Suitability Hierarchy by Soil and Land Use",
                   color='Crop_Suitability',
                   color_discrete_sequence=px.colors.sequential.RdBu)

fig11.update_layout(
    width=900,
    height=700,
    title_font_size=20,
    font=dict(size=12),
    plot_bgcolor='rgba(0, 0, 0, 0)'
)

fig11.show()
In [22]:
corr_matrix = df[['Fertility_Index', 'RainfallMM', 'TemperatureC']].corr()
corr_matrix = corr_matrix.map(lambda x: f"{x:.3f}")

fig12 = px.imshow(corr_matrix, 
                 labels=dict(x="Variables", y="Variables", color="Correlation Coefficient"),
                 title='Correlation Matrix for Fertility Index, Rainfall, and Temperature',
                 color_continuous_scale='RdBu', text_auto=True)

fig12.update_layout(plot_bgcolor='rgba(0, 0, 0, 0)', title_font_size=20, font=dict(size=12))
fig12.show()
In [33]:
fig13 = px.scatter_matrix(df,
                         dimensions=['Fertility_Index', 'RainfallMM', 'TemperatureC'],
                         title="Pair Plot: Fertility Index, Rainfall, Temperature",
                         color="Fertility_Index",
                         color_continuous_scale='Viridis'
                        )

fig13.update_layout(
    width=900,
    height=700,
    title_font_size=20,
    font=dict(size=12),
    plot_bgcolor='rgba(0, 0, 0, 0)'
)

fig13.show()
In [34]:
fig14 = px.scatter_3d(df, x='Fertility_Index', y='RainfallMM', z='TemperatureC',
                     title="3D Plot: Fertility Index vs Rainfall vs Temperature",
                     color='Fertility_Index',
                     color_continuous_scale='Viridis')

fig14.update_layout(
    width=900,
    height=700,
    title_font_size=20,
    font=dict(size=12),
    plot_bgcolor='rgba(0, 0, 0, 0)'
)

fig14.show()
In [ ]: